Chapter 7 Diversity analyses

load("data/data.Rdata")

7.1 Alpha diversity

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# Aggregate basal GIFT into elements
#Get list of present MAGs
present_MAGs <- genome_counts_filt %>%
    column_to_rownames(var = "genome") %>%
        filter(rowSums(.[, -1]) != 0) %>%
        rownames()

#Align KEGG annotations with present MAGs and remove all-zero and all-one traits
present_MAGs <- present_MAGs[present_MAGs %in% rownames(genome_gifts)]
genome_gifts_filt <- genome_gifts[present_MAGs,] %>%
            select_if(~!all(. == 0)) %>%  #remove all-zero modules
            select_if(~!all(. == 1)) #remove all-one modules

#Filter count table to only contain present MAGs after KEGG filtering
genome_counts_filt_filt <- genome_counts_filt %>%
  column_to_rownames(var = "genome")

genome_counts_filt_filt <- genome_counts_filt_filt[present_MAGs,]


dist <- genome_gifts_filt %>%
  to.elements(., GIFT_db) %>%
  traits2dist(., method = "gower")

functional <- genome_counts_filt_filt %>%
  #column_to_rownames(var = "genome") %>%
  #dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample)) %>%
  full_join(functional, by = join_by(sample == sample))
alpha_div %>%
  pivot_longer(-sample, names_to = "alpha", values_to = "value") %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    group_by(alpha)%>%
    summarise(
              Aisa_mean=mean(value[Transect=="Aisa"], na.rm=T),
              Aisa_sd=sd(value[Transect=="Aisa"], na.rm=T),
              Aran_mean=mean(value[Transect=="Aran"], na.rm=T),
              Aran_sd=sd(value[Transect=="Aran"], na.rm=T),
              Sentein_mean=mean(value[Transect=="Sentein"], na.rm=T),
              Sentein_sd=sd(value[Transect=="Sentein"], na.rm=T),
              Tourmalet_mean=mean(value[Transect=="Tourmalet"], na.rm=T),
              Tourmalet_sd=sd(value[Transect=="Tourmalet"], na.rm=T))%>%
    mutate(
           Aisa=str_c(round(Aisa_mean,2),"±",round(Aisa_sd,2)),
           Aran=str_c(round(Aran_mean,2),"±",round(Aran_sd,2)),
           Sentein=str_c(round(Sentein_mean,2),"±",round(Sentein_sd,2)),
           Tourmalet=str_c(round(Tourmalet_mean,2),"±",round(Tourmalet_sd,2))) %>% 
    arrange(-Aisa_mean) %>% 
    dplyr::select(alpha,Aisa,Aran,Sentein,Tourmalet) %>% 
    tt()
tinytable_rxkydepas6z4i96htmjx
alpha Aisa Aran Sentein Tourmalet
richness 198.77±65.73 153.32±72.26 257.32±71.08 245.04±68.73
neutral 87.81±36.55 71.13±36.69 110.98±45.84 101.5±43.7
phylogenetic 4.98±1.13 4.81±1.12 5.41±0.86 5.29±0.9
functional 1.5±0.05 1.5±0.06 1.52±0.03 1.5±0.04
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == EHI_number)) %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
      ggplot(aes(y = value, x = Transect, group=Transect, color=Transect, fill=Transect)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Transect",
          breaks=c("Aisa","Aran","Sentein","Tourmalet"),
          labels=c("Aisa","Aran","Sentein","Tourmalet"),
          values=c("#e5bd5b", "#6b7398","#e2815a", "#876b96")) +
      scale_fill_manual(name="Transect",
          breaks=c("Aisa","Aran","Sentein","Tourmalet"),
          labels=c("Aisa","Aran","Sentein","Tourmalet"),
          values=c("#e5bd5b50", "#6b739850","#e2815a50", "#876b9650")) +
      facet_wrap(. ~ metric, scales = "free", ncol=4) +
      coord_cartesian(xlim = c(1, NA)) +
      stat_compare_means(size=2) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.text.x = element_text(angle = 45, hjust = 1)
      ) +
  ylab("Alpha diversity")      

7.1.1 Regression plots

7.1.1.1 Richness diversity

columns <- c("richness","neutral","phylo","func","mapped","total")
alpha_div %>%
            select(sample,richness) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
      ggplot(aes(x = Elevation, y = value)) +
        geom_point() +
        geom_smooth(method = lm) +
        facet_wrap(~ factor(Transect))+
        labs(x = "Elevation (m)")

7.1.1.2 Neutral diversity

alpha_div %>%
            select(sample,neutral) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == EHI_number))  %>%
      ggplot(aes(x = Elevation, y = value)) +
        geom_point() +
        geom_smooth(method = lm) +
        facet_wrap(~ factor(Transect))+
        labs(x = "Elevation (m)")

7.1.1.3 Phylogenetic diversity

alpha_div %>%
            select(sample,phylogenetic) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
      ggplot(aes(x = Elevation, y = value)) +
        geom_point() +
        geom_smooth(method = lm) +
        facet_wrap(~ factor(Transect))+
        labs(x = "Elevation (m)")

7.1.1.4 Functional diversities

alpha_div %>%
            select(sample,functional) %>%
            pivot_longer(-sample, names_to = "data", values_to = "value") %>%
            mutate(data = factor(data, levels = columns))   %>%
            left_join(sample_metadata, by = join_by(sample == EHI_number))  %>%
      ggplot(aes(x = Elevation, y = value)) +
        geom_point() +
        geom_smooth(method = lm) +
        facet_wrap(~ factor(Transect))+
        labs(x = "Elevation (m)")

7.1.2 Mixed models

7.1.2.1 Richness diversity

richness_cor<-alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  lmerTest::lmer(richness ~ log(sequencing_depth) + Elevation*Transect + (1 | Sampling_point), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_1mfx4gdsbcnyarxel5ar
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) -1.556876e+03 213.86817441 -7.279608 102.67218 6.967323e-11
fixed NA log(sequencing_depth) 1.089104e+02 11.22012642 9.706700 101.19457 3.904741e-16
fixed NA Elevation -5.945361e-02 0.05333190 -1.114785 19.97815 2.781739e-01
fixed NA TransectAran -1.456675e+02 95.32890330 -1.528052 19.77139 1.423407e-01
fixed NA TransectSentein -1.282416e+02 98.14650386 -1.306634 20.96734 2.054881e-01
fixed NA TransectTourmalet -1.937802e+02 91.46019166 -2.118739 20.50091 4.650671e-02
fixed NA Elevation:TransectAran 9.264522e-02 0.06209640 1.491958 19.59034 1.516391e-01
fixed NA Elevation:TransectSentein 1.120611e-01 0.06399044 1.751216 21.21597 9.435497e-02
fixed NA Elevation:TransectTourmalet 1.204621e-01 0.05764375 2.089769 20.55707 4.926134e-02
ran_pars Sampling_point sd__(Intercept) 1.411515e+01 NA NA NA NA
ran_pars Residual sd__Observation 4.636663e+01 NA NA NA NA

7.1.2.2 Neutral diversity

neutral_cor<-alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  lmerTest::lmer(neutral ~ log(sequencing_depth) + Elevation*Transect + (1|Sampling_point), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_oaatuu0ksxd33y7uk2rg
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) -503.46092882 155.88510189 -3.229692 102.94139 1.663714e-03
fixed NA log(sequencing_depth) 38.53316355 8.30102716 4.641975 102.38427 1.026312e-05
fixed NA Elevation -0.04029799 0.03545720 -1.136525 17.82419 2.707869e-01
fixed NA TransectAran -69.54734931 63.33417863 -1.098101 17.59583 2.869598e-01
fixed NA TransectSentein -73.46205358 65.46915017 -1.122087 19.00703 2.758013e-01
fixed NA TransectTourmalet -97.07019202 60.91625317 -1.593502 18.45946 1.280264e-01
fixed NA Elevation:TransectAran 0.04308459 0.04122860 1.045017 17.37945 3.103332e-01
fixed NA Elevation:TransectSentein 0.05934449 0.04271777 1.389222 19.28043 1.806027e-01
fixed NA Elevation:TransectTourmalet 0.05957175 0.03839934 1.551374 18.50873 1.377430e-01
ran_pars Sampling_point sd__(Intercept) 6.64785214 NA NA NA NA
ran_pars Residual sd__Observation 34.74128890 NA NA NA NA

7.1.2.3 Phylogenetic diversity

phylo_cor<-alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  lmerTest::lmer(phylogenetic ~ log(sequencing_depth) + Elevation*Transect + (1 | Sampling_point), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_bgd6wf9whpft59izzfob
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 7.9723798522 4.1857148123 1.9046639 103.16183 0.05960912
fixed NA log(sequencing_depth) -0.0638680767 0.2249894059 -0.2838715 103.46820 0.77707614
fixed NA Elevation -0.0012366705 0.0008881157 -1.3924655 18.17137 0.18058793
fixed NA TransectAran -0.7649418524 1.5853816744 -0.4824970 17.90442 0.63529916
fixed NA TransectSentein -1.7363345713 1.6449704378 -1.0555415 19.63947 0.30399034
fixed NA TransectTourmalet -3.2228577094 1.5284405960 -2.1085921 18.97101 0.04850288
fixed NA Elevation:TransectAran 0.0002832543 0.0010313958 0.2746320 17.63476 0.78679044
fixed NA Elevation:TransectSentein 0.0014224302 0.0010740261 1.3243907 19.95532 0.20034248
fixed NA Elevation:TransectTourmalet 0.0022220955 0.0009635780 2.3060877 19.01432 0.03253476
ran_pars Sampling_point sd__(Intercept) 0.0488567254 NA NA NA NA
ran_pars Residual sd__Observation 0.9517063007 NA NA NA NA

7.1.2.4 Functional diversities

func_cor<-alpha_div %>%
  left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
  lmerTest::lmer(functional ~ log(sequencing_depth) + Elevation*Transect + (1 | Sampling_point), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
tinytable_j8yg2mncak46j1yeqn76
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 1.604632e+00 2.146120e-01 7.47689687 105 2.400124e-11
fixed NA log(sequencing_depth) -5.072130e-03 1.154428e-02 -0.43936293 105 6.613015e-01
fixed NA Elevation -1.096272e-05 4.526467e-05 -0.24219152 105 8.091042e-01
fixed NA TransectAran 2.877083e-02 8.079764e-02 0.35608504 105 7.224913e-01
fixed NA TransectSentein -5.637786e-03 8.386423e-02 -0.06722516 105 9.465303e-01
fixed NA TransectTourmalet -6.778434e-02 7.791307e-02 -0.86999962 105 3.862852e-01
fixed NA Elevation:TransectAran -2.198663e-05 5.256109e-05 -0.41830623 105 6.765775e-01
fixed NA Elevation:TransectSentein 1.445851e-05 5.475949e-05 0.26403659 105 7.922692e-01
fixed NA Elevation:TransectTourmalet 4.317958e-05 4.911932e-05 0.87907520 105 3.813678e-01
ran_pars Sampling_point sd__(Intercept) 0.000000e+00 NA NA NA NA
ran_pars Residual sd__Observation 4.887921e-02 NA NA NA NA

7.2 Beta diversity

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, tree = genome_tree)

beta_q1f <- genome_counts_filt_filt %>%
  #column_to_rownames(., "genome") %>%
  hillpair(., q = 1, dist = dist)